# -*- coding: utf-8 -*-
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector

class DmozSpider(BaseSpider):
    name = "dmoz"
    allowed_domains = ["pythontab.com"]
    start_urls = [
        "http://www.pythontab.com/html/2013/pythonhexinbiancheng_0814/541.html",
        # "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
    ]


    def parse(self,response):
        hxs = HtmlXPathSelector(response)
        with open('a.html','wb') as f:
            f.write(hxs)

        sites = hxs.select('//ul/li')
        # with open('b.txt','wb') as f:
        #     f.write(sites)
        #     f.write("\n=========================\n")

        # for site in sites:
        #     title = site.select('a/text()').extract()
        #     link = site.select('a/@href').extract()
        #     desc = site.select('text()').extract()
        #     with open('c.txt','wb') as f:
        #         f.write(title)
                # f.write("\n======================================================\n")
                # f.write(link)
                # f.write("\n======================================================\n")
                # f.write(desc)
                # f.write("\n======================================================\n")
            # print title,link,desc

    # def parse(self, response):
    #     filename = response.url.split("/")[-2] + '.html'
    #     with open(filename,'wb') as f:
    #         f.write(response.body)